home *** CD-ROM | disk | FTP | other *** search
- \documentstyle[noweb,multicol]{article}
- \title{Converting {\LaTeX} to HTML}
- \author{Norman Ramsey\\{\tt norman@bellcore.com}}
- \noweboptions{smallcode}
- \setcounter{secnumdepth}{1}
- \begin{document}
- \pagenumbering{roman}
- \maketitle
- \tableofcontents
- \pagenumbering{arabic}
- \section{Introduction}
- This program provides an infrastructure for converting {\LaTeX} to
- HTML.
- That infrastructure can be used to make a {\tt noweb} filter or to
- make a standalone conversion program.
- The program is roughly divided into three parts.
- Section~\ref{cs-decls} assigns a meaning (treatment) to each control
- sequence.
- It is roughly declarative, and the hope is that one day it can
- be replaced by a data file, which could be augmented dynamically.
- (The central flaw in this program is that all {\TeX} control
- sequences must be hard-wired.)
- Section~\ref{engine} describes the engine used to do the conversion,
- Sections \ref{imp-decl}~and~\ref{html-format} gives the procedures that do the individual conversions.
- \section{Descriptions of control sequences}
- \label{cs-decls}
- This section defines behavior for each control sequence we know how to
- convert.
- The definitions have a declarative flavor, since most are done by
- procedure calls.
- These calls initialize the machinery descriped in Section~\ref{cs-tables}.
- {\LaTeX} control sequences come first, using
- the same organization as the quick reference card from the
- second edition of the {\LaTeX} manual.
- Other control sequences follow.
- \subsection{{\LaTeX} control sequences}
- \subsubsection{Sentences and paragraphs}
- <<control-sequence assignments>>=
- substitution(",", " ")
- substitution(" ", " ")
- substitution("\n", "\n")
- substitution("\t", " ")
- ignore("@")
- ignore("/") # no italic correction
- substitution("", "\n") # \<newline> treated as request for newline
- every c := !"$%#{}_" do
- substitution(c, c)
- substitution("&", "&")
- argblock("emph", "em")
- argblock("footnote", "<b>[</b>", "<b>]</b>") # put footnotes in bold brackets
- argblock("footnotetext", "<b>[</b>", "<b>]</b>")
- argblock("thanks", "<b>[</b>", "<b>]</b>") # put footnotes in bold brackets
- substitution("today", &date)
- \subsubsection{Type style}
- <<control-sequence assignments>>=
- ignore("textrm") # html can't switch to default font!
- argblock("textit", "i")
- argblock("textbf", "b")
- argblock("textsl", "i")
- ignore("textsc")
- argblock("texttt", "tt")
- ignore("textsf")
- ignore("boldmath")
- # \boldmath could be done by introducing S.mathfont, but I don't want to!
- <<control-sequence assignments>>=
- ignore("mathrm") # html can't switch to default font!
- argblock("mathit", "i")
- argblock("mathbf", "b")
- argblock("mathtt", "tt")
- ignore("mathsf")
- argblock("mathcal", "i")
- @ HTML has only one size.
- <<control-sequence assignments>>=
- every ignore("tiny" | "scriptsize" | "footnotesize" | "small" | "normalsize" |
- "large" | "Large" | "LARGE" | "huge" | "Huge")
- \subsubsection{Accents and symbols}
- I couldn't find an official way to do symbols.
- Maybe when CERN comes back up I can find the ISO Latin~1 character set.
- <<control-sequence assignments>>=
- every accent(key(accent_name))
- every ignore("dag" | "ddag" | "S" | "P" | "copyright" | "pounds")
- \subsubsection{Sectioning and table of contents}
- <<control-sequence assignments>>=
- argblockv("part", "h1", &null, "*[")
- argblockv("chapter", "h1", &null, "*[")
- argblockv("section", "h2", &null, "*[")
- argblockv("subsection", "h3", &null, "*[")
- argblockv("subsubsection", "h4", &null, "*[")
- argblockv("paragraph", "h5", &null, "*[")
- argblockv("subparagraph", "h6", &null, "*[")
- ignore("appendix")
- auxfile("tableofcontents", "toc", "<p><b>[Table of contents]</b><p>",
- "<h2>Table of Contents</h2>")
- cstab["tableofcontents"] := Ctableofcontents # override to call set_toclevel
- ignore("listoftables")
- \subsubsection{Mathematical formulas}
- Here we see our first assignments to [[cstab]], which is the real
- technology underlying these seemingly declarative calls.
- I'll assign to [[cstab]] directly when some really special behavior is
- called for. In this case, it's going in and out of math mode.
- <<control-sequence assignments>>=
- cstab["("] := Cmath
- cstab[")"] := Cmath_end
- cstab["["] := Cdisplaymath
- cstab["]"] := Cdisplaymath_end
- ignoreenv("equation")
- every table_env(star("eqnarray"), 0, " ", "blockquote") # also lame
- substitution("frac", "<b>frac</b>")
- substitution("sqrt", "<b>frac</b>")
- every substitution("ldots" | "cdots" | "vdots", "...")
- ignore("left")
- ignore("right")
- ignore("overline")
- substitution(":", " ")
- substitution(";", " ")
- ignore("!")
- @ The [[star]] procedure lets us define \verb+eqnarray+ and
- \verb+eqnarray*+ in one fell swoop.
- <<*>>=
- procedure star(cs)
- suspend cs | (cs || "*")
- There are a gazillion symbols. I'll add them on demand.
- <<control-sequence assignments>>=
- substitution("Diamond", "<>")
- substitution("langle", "<")
- substitution("rangle", ">")
- substitution("le", "<=")
- substitution("ge", ">=")
- substitution("bmod", "</i>mod<i>") # better hook in with math
- substitution("equiv", "===")
- \subsubsection{Displayed paragraphs}
- HTML really has only one kind of displayed paragraph---the block quotation.
- <<control-sequence assignments>>=
- envblock("quote", "blockquote")
- envblock("quotation", "blockquote")
- envblock("center", "blockquote")
- envblock("flushleft", "blockquote")
- envblock("flushright", "blockquote")
- envblock("verse", "blockquote")
- begintab["verbatim"] := Cverbatim
- begincl["verbatim"] := verbatim_cl("pre", "\\end{verbatim}")
- cstab["verb"] := Cverb
- \subsubsection{Lists}
- <<control-sequence assignments>>=
- cstab["item"] := Citem
- csclosure["item"] := [item_cl("<li>", "", "<li>")]
- listenv("itemize", "ul")
- listenv("enumerate", "ol")
- listenv("description", "dl")
- \subsubsection{???}
- <<control-sequence assignments>>=
- ignore("documentstyle", "[{")
- ignore("documentclass", "[{")
- ignore("usepackage", "[{")
- ignore("pagestyle", "{")
- ignore("pagenumbering", "{")
- \subsubsection{Title page and abstract}
- I could be clever and have \verb+\title+ have a side effect
- that sticks in the right boilerplate when we see \verb+\begin{document}+,
- but for now it's not worth the hassle.
- <<control-sequence assignments>>=
- argblockv("title", "h1")
- argblockv("author","address")
- argblockv("date", "b")
- substitution("maketitle", "<!--title goes here-->")
- ignoreenv("titlepage")
- envblock("abstract", "<h2>Abstract</h2><blockquote>", "</blockquote>")
- \subsubsection{Cross-reference}
- A more ambitious scheme would make labels anchor at preceding
- sectioning commands, but it's hard to see how to do that in one pass.
- Instead, I just use some conventional glyphs.
- I use special procedures for the cross-references so I can have an
- arrow pointing either forward or backward, depending on the direction
- of the reference.
- <<control-sequence assignments>>=
- cstab["label"] := Clabel
- cstab["ref"] := Cref
- cstab["pageref"] := Cref
- \subsubsection{Bibliography and citation}
- For the bibliography, I actually go grubbing for a {\tt .bbl} file if
- I can find one.
- <<control-sequence assignments>>=
- ignore("bibliographystyle", "{")
- auxfile("bibliography", "bbl", "<b>[BibTeX bibliography]</b>", &null, "{")
- envblock("thebibliography", "<h2>References</h2>", "", "{")
- cstab["cite"] := Ccite
- cstab["bibitem"] := Cbibitem
- ignore("newblock")
- ignore("nocite", "{")
- \subsubsection{Splitting the input}
- All input is ignored. Those things are in their own files.
- <<control-sequence assignments>>=
- every ignore("input" | "include" | "includeonly", "{")
- # filecontents not done yet
- \subsubsection{Line breaking}
- <<control-sequence assignments>>=
- substitution("\\", "<br>", "[")
- substitution("linebreak", "<br>")
- ignore("-")
- ignoreenv("sloppypar")
- ignore("sloppy")
- \subsubsection{Page breaking}
- I simulate forced page breaks by horizontal rules.
- <<control-sequence assignments>>=
- substitution("pagebreak", "<hr>")
- substitution("newpage", "<hr>")
- substitution("clearpage", "<hr>")
- ignore("enlargethispage", "*{")
- \subsubsection{Boxes}
- <<control-sequence assignments>>=
- ignore("mbox")
- ignore("makebox", "([[") # ( comes from picture area
- ignore("fbox")
- ignore("framebox", "[[") # could insert horizontal rules, but why?
- ignore("newsavebox", 1)
- ignore("sbox", 2)
- ignore("savebox", "{[[{")
- ignore("usebox", 1)
- envblock("minipage", "blockquote", &null, "[{")
- argblock("parbox", "blockquote", &null, "[{")
- \subsubsection{Space}
- <<control-sequence assignments>>=
- ignore("hspace", "*{")
- ignore("hfil")
- ignore("hfill")
- ignore("vspace", "*{")
- ignore("vfil")
- ignore("vfill")
- \subsubsection{Length}
- <<control-sequence assignments>>=
- ignore("newlength", "{")
- ignore("setlength", "{{")
- ignore("addtolength", "{{")
- \subsubsection{Pictures}
- <<control-sequence assignments>>=
- envblock("picture", "<b>[picture]</b>", "", "((")
- ignore("put", "({")
- ignore("multiput", "(({{")
- ignore("dashbox", "{([")
- ignore("line", "({")
- ignore("vector", "({")
- ignore("shortstack", "[")
- ignore("circle", "*{")
- ignore("oval", "([")
- ignore("frame")
- ignore("thinlines")
- ignore("thicklines")
- \subsubsection{Figures and Tables}
- I surround figures and tables with horizontal rules.
- <<control-sequence assignments>>=
- every envblock(star("figure"), "<hr>", "<hr>", "[")
- every envblock(star("table"), "<hr>", "<hr>", "[")
- argblock("caption", "b") # captions in bold
- \subsubsection{{\tt tabbing} environment}
- I can't see how to do anything sensible with {\tt tabbing}.
- <<control-sequence assignments>>=
- envblock("tabbing", "blockquote")
- # \= is accent
- ignore(">")
- ignore("+")
- ignore("kill")
- \subsubsection{{\tt array} and {\tt tabular} environment}
- <<control-sequence assignments>>=
- envblock("array", "blockquote", &null, "[{")
- envblock("tabular", "blockquote", &null, "[{")
- envblock("tabularx", "blockquote", &null, "[{")
- ignore("multicolumn", "{{")
- substitution("hline", "<hr>")
- ignore("cline", "{")
- \subsubsection{Definitions}
- <<control-sequence assignments>>=
- ignore("newcommand", "A[[{")
- ignore("renewcommand", "A[[{")
- ignore("newenvironment", "{[{{")
- ignore("renewenvironment", "{[{{")
- ignore("newtheorem", "{{")
- \subsubsection{Numbering}
- We have to have a special [[setcounter]] so we can ignore the right stuff
- in the table of contents.
- <<control-sequence assignments>>=
- cstab["setcounter"] := Csetcounter
- ignore("addtocounter", "{{")
- \subsubsection{Other {\LaTeX} control sequences}
- <<control-sequence assignments>>=
- cstab["makeatletter"] := Cmakeatletter
- cstab["makeatother"] := Cmakeatother
- Here are all the old-style font changes.
- <<control-sequence assignments>>=
- fontchange("tt", "tt")
- fontchange("bf", "b")
- fontchange("it", "i")
- fontchange("sl", "i")
- fontchange("em", "em")
- ignore("rm") # html can't switch to default font!
- ignore("sf")
- @ And some new ones
- <<control-sequence assignments>>=
- ignore("rmfamily")
- <<control-sequence assignments>>=
- ignoreenv("document")
- <<control-sequence assignments>>=
- substitution("LaTeX", "LaTeX")
- <<control-sequence assignments>>=
- ignore("numberline", "{")
- ignore("protect")
- ignore("twocolumn", "C")
- ignore("typeout", "[{")
- ignore("closedbib")
- <<control-sequence assignments>>=
- every ignore("leftmargini" | "leftmarginii" | "labelsep" | "fboxsep", "=")
- every ignore("tabcolsep", "=")
- \subsection{Control sequences from various {\LaTeX} packages}
- <<control-sequence assignments>>=
- ignoreenv("multicols", "{C")
- cstab["citeN"] := Ccite
- ignore("afterpage", "{")
- A (perhaps vain) attempt to implement \verb+\kill+.
- <<control-sequence assignments>>=
- cstab["kill"] := Ckill
- \subsection{Plain {\TeX} control sequences}
- <<control-sequence assignments>>=
- argblock("noalign", "<br>", "<br>") # not clear what else to do...
- argblock("centerline", "<br>", "<br>")
- substitution("cr", "<br>")
- substitution("hrule", "<hr>")
- substitution("vrule", "|")
- substitution("hrulefill", "------")
- ignore("hbox")
- ignore("rlap")
- ignore("llap")
- ignore("vbox")
- ignore("vtop")
- ignore("message", "{")
- ignore("relax")
- ignore("null")
- ignore("offinterlineskip")
- <<control-sequence assignments>>=
- cstab["par"] := implicit_paragraph
- cstab["smallskip"] := implicit_paragraph
- cstab["medskip"] := implicit_paragraph
- cstab["bigskip"] := implicit_paragraph
- cstab["vskip"] := implicit_paragraph
- csclosure["vskip"] := "="
- We can't give the grouping control sequences their real meaning, because
- that would blow our brace balance when ignoring definitions and the like.
- The proper solution would be to distinguish between grouping and braces,
- but that would require much more sophistication than we've got just now.
- <<control-sequence assignments>>=
- every ignore("begingroup" | "endgroup" | "bgroup" | "egroup")
- <<control-sequence assignments>>=
- cstab["newif"] := Cnewif
- cstab["iffalse"] := Ciffalse
- cstab["iftrue"] := Ciftrue
- cstab["else"] := Celse
- cstab["fi"] := Cfi
- cstab["ifx"] := cstab["if"] := cstab["ifnum"] := Ciffalse
- Lots of assignable things:
- <<control-sequence assignments>>=
- ignore("let", "A=")
- every ignore("hfuzz" | "parindent" | "parskip" | "baselineskip", "=")
- every ignore("hbadness" | "hsize" | "vsize" | "overfullrule" | "tabskip", "=")
- substitution("hskip", " ", "=")
- <<control-sequence assignments>>=
- ignore("unskip")
- ignore("hss")
- ignore("phantom", "{")
- every ignore("kern" | "lower" | "spacefactor", "=") # a cheat, but works
- every ignore("clubpenalty" | "widowpenalty", "=")
- @ Other stuff to be ignored:
- <<control-sequence assignments>>=
- every ignore("expandafter" | "indent" | "noindent" | "leavevmode" | "strut")
- ignore("def", 1)
- <<control-sequence assignments>>=
- substitution("TeX", "TeX")
- \subsection{Other control sequences}
- I get to include my favorite {\TeX} hacks.
- We define ignoring loosely; the count denotes the number of balanced-brace pairs.
- We also ignore everything before an ignored balanced-brace pair, which means
- it works for \verb+\def+.
- <<control-sequence assignments>>=
- ignore("noweboptions", 1)
- Now, here are a couple of righteous hacks!
- The idea is that most views will ignore this stuff, but the indexer might
- use it to get clever about dumping chunks and all in the right places.
- <<control-sequence assignments>>=
- substitution("nowebindex", "<nowebindex>")
- substitution("nowebchunks", "<nowebchunks>")
- ignore("nowebsize")
- <<control-sequence assignments>>=
- envblock("fields", "blockquote", &null, "[") # lame; could try to <tt> 1st col
- envblock("fields*", "blockquote", &null, "{") # lame; could try to <tt> 1st col
- ignore("citeauthoryear", "{{{")
- ignore("authoryear", "{{")
- substitution("bibrule", "--------")
- let("bibskip", "par")
- every cstab["anoncite"|"authorcite"] := Ccite
- This will always have to be patched by hand, but it may be worth it.
- <<control-sequence assignments>>=
- argblock("psfig", "<a href=\"", "\">PostScript</a>")
- ignore("pssilent")
- ignore("psnoisy")
- \section{The conversion engine}
- \label{engine}
- The converter doesn't have the luxury of working on the whole text at
- once; instead it has to accept and convert a piece at a time.
- If I really understood co-expressions, I would surely make them sit up
- and beg.
- Since I don't, I keep some state around, and I pass continuations and
- closures like there's no tomorrow.
- \subsection{Basic conversion}
- Here's the basic engine, which works by string scanning.
- The initial boilerplate sets up the second argument (if any) as
- [[&subject]].
- We have the odd specials [["\0"]] and [["\1"]], which are
- used to delimit quoted code in noweb.
- Woe betide the hapless user who has real nulls or 1s in his {\LaTeX} file.
- <<*>>=
- procedure convert(S, optstring)
- static specials
- initial {
- <<initialization>>
- <<control-sequence assignments>>
- <<assign to dynamic-add table>>
- specials := '\\{}<>"%$&~\n\0\1'
- if \optstring then return optstring ? convert(S)
- else {
- <<scan, convert, and return result>>
- If I were a good dog, I would make a state diagram.
- Since I'm not, I'll just say that we either
- accumulate text using the function [[S.text]], which exists for that
- purpose, or else we do something special upon encountering a special character.
- The [[<<take actions appropriate to new text>>]]
- chunk may do something special with the text in
- case we're not in the default state (for example, we may be scanning
- for the end of a comment).
- Encountering a non-threatening character throws the converter into
- horizontal mode.
- <<scan, convert, and return result>>=
- <<take actions appropriate to new text>>
- if S.mode == "V" & any(~'\\{}<>%\n\t ') then S.mode := "H"
- emit_text(S, tab(upto(specials) | 0))
- while not pos(0) do
- if S.mode == "Q" then { # quoting
- emit_text(S, tab(upto('\1') | 0))
- if ="\1" then {
- emit_text(S, "\1")
- S.mode := "H"
- }
- } else {
- case move(1) of {
- "\\" : {<<control sequence>>}
- "{" : {<<take open-group actions>>}
- "}" : {<<take close-group actions>>}
- "%" : {<<comment>>}
- "~" : emit_text(S, " ") # should be but netscape doesn't support it
- "\n" : {<<newline>>}
- "$" : {<<dollar sign>>}
- "&" : {<<ampersand>>}
- "\0" : {S.mode := "Q"; emit_text(S, "\0")}
- # remaining cases simply escape HTML specials
- "<" : emit_text(S, "<")
- ">" : emit_text(S, ">")
- "\"" : emit_text(S, """)
- }
- emit_text(S, tab(upto(specials) | 0))
- return 1(. S.the_text, S.the_text := "") # what's been converted
- The definition of a converter's state is distributed.
- We've already seen the use of [[mode]].
- <<*>>=
- record state(mode <<other fields of state>>)
- # mode is H, V, or M
- To create a new state, the default mode is vertical
- <<*>>=
- procedure converter(mode)
- /mode := "V"
- return state(mode <<initial values for other fields of state>>)
- To avoid repeated memory allocation, we provide a routine to reset a
- converter to its initial state.
- <<*>>=
- procedure reset(S)
- <<code to reset [[S]]>>
- return S
- The basic action performed by the
- [[S.text]] function is to accumulate converted text in [[S.the_text]].
- [[S.text]] is usually [[accumulate_text]].
- <<*>>=
- procedure accumulate_text(S, text)
- S.the_text ||:= text
- return
- <<other fields of state>>=
- , text, the_text
- <<initial values for other fields of state>>=
- , accumulate_text, ""
- <<code to reset [[S]]>>=
- S.text := accumulate_text
- S.the_text := ""
- [[emit_text]] just uses the current value of [[S.text]], provided we aren't
- currently ignoring tokens.
- Its primary use is to appear in closures, when we don't know what
- [[S.text]] will be when the closure is executed.
- <<*>>=
- procedure emit_text(S, text)
- return if \S.ignoring then "" else S.text(S, text)
- <<other fields of state>>=
- , ignoring
- <<initial values for other fields of state>>=
- , &null
- <<code to reset [[S]]>>=
- S.ignoring := &null
- \subsection{Action and continuation hooks}
- We provide hooks so that actions can be taken at various points.
- The major ones are:
- \begin{description}
- \item[\tt newtext]
- When the next string is passed in for conversion.
- \item[open brace]
- After the next open brace or begin environment.
- \item[close brace]
- Before the next close brace or end environment.
- \end{description}
- \subsubsection{{\tt newtext}}
- [[newtext]] is a list of closures to be executed (actions to take)
- when the next input comes.
- <<other fields of state>>=
- , newtext
- <<initial values for other fields of state>>=
- <<code to reset [[S]]>>=
- S.newtext := []
- A closure is simply a procedure with arguments.
- <<*>>=
- record closure(proc, args)
- [[before_next_newtext]] and [[after_next_newtext]]
- add to the list of actions to be taken (at the left and right, respectively).
- <<*>>=
- procedure before_next_newtext(S, proc, args)
- push(S.newtext, closure(proc, args))
- procedure after_next_newtext(S, proc, args)
- put(S.newtext, closure(proc, args))
- When taking the actions, be careful to avoid infinite loop, e.g., on empty lines.
- <<take actions appropriate to new text>>=
- l := S.newtext
- S.newtext := []
- while c := get(l) do
- c.proc!c.args
- Some control sequences temporarily override all actions to be taken on
- a new input, using [[delay_newtext]].
- [[undelay_newtext]] restores actions.
- <<*>>=
- procedure delay_newtext(S)
- S.delayed_newtext := S.newtext
- S.newtext := []
- return
- procedure undelay_newtext(S)
- S.newtext := \S.delayed_newtext |
- {write(&errout, "This can't happen: null delayed_newtext"); &null[0]}
- S.delayed_newtext := &null
- <<other fields of state>>=
- , delayed_newtext
- <<initial values for other fields of state>>=
- , &null
- <<code to reset [[S]]>>=
- S.delayed_newtext := &null
- \subsubsection{Opening and closing groups}
- There's only one list of actions to be taken at the next open,
- but there's a whole stack of lists of actions to be taken at closes.
- <<other fields of state>>=
- , open, closes
- <<initial values for other fields of state>>=
- , [], []
- <<code to reset [[S]]>>=
- every S.open | S.closes := []
- <<*>>=
- procedure after_next_open(S, proc, args)
- return put(S.open, closure(proc, args))
- procedure before_next_close(S, proc, args)
- return push(S.closes[1], closure(proc, args)) # lost at top level
- procedure after_next_close(S, proc, args)
- return put(S.closes[1], closure(proc, args)) # lost at top level
- <<take open-group actions>>=
- push(S.closes, []) # fresh set of closing tasks
- while c := get(S.open) do
- c.proc!c.args
- <<take close-group actions>>=
- while c := get(S.closes[1]) do
- c.proc!c.args
- pop(S.closes)
- <<old>>=
- procedure Cbegingroup(S, cs, cl)
- <<take open-group actions>>
- <<old>>=
- procedure Cendgroup(S, cs, cl)
- <<take close-group actions>>
- <<old control-sequence assignments>>=
- cstab["begingroup"] := Cbegingroup
- cstab["endgroup"] := Cendgroup
- cstab["bgroup"] := Cbegingroup
- cstab["egroup"] := Cendgroup
- \subsection{Handling control sequences and environments}
- OK, to eat a control sequence, first scan it, then execute it using [[do_cs]].
- [[S.csletters]] records the current set of ``letters'' for control
- sequences (so we can interpret \verb+\makeatletter+).
- <<control sequence>>=
- cs := if pos(0) then ""
- else if any(S.csletters) then tab(many(S.csletters))
- else move(1)
- if /S.ignoring | cs == ("else"|"fi") | cstab[cs] === (Ciffalse|Ciftrue) then
- do_cs(S, cs)
- &null # error("### Ignoring \\", cs)
- <<other fields of state>>=
- , csletters
- <<initial values for other fields of state>>=
- , &letters
- <<code to reset [[S]]>>=
- S.csletters := &letters
- To execute a control sequence, look up its procedure in [[cstab]],
- and pass in the name of the control sequence, plus the closure
- argument from [[csclosure]].
- \label{cs-tables}
- <<*>>=
- global cstab, csclosure
- procedure do_cs(S, cs)
- tab(many(' \t')) # skip white space following CS
- if pos(0) | any('\n') then before_next_newtext(S, skipblanks, [S])
- (cstab[cs])(S, cs, csclosure[cs])
- return
- <<initialization>>=
- cstab := table(unknown_cs)
- csclosure := table()
- The default action for an unknown control sequence is [[unknown_cs]].
- If the global [[show_unknowns]] is set we dump the control sequence into the
- output in bold. We save the unknown sequences for later warning messages.
- <<*>>=
- global show_unknowns
- procedure unknown_cs(S, cs, cl)
- # if S.text === ignore_text then return # a bit of a hack -- should no longer be needed
- if \show_unknowns then S.text(S, "<b>\\" || cs || "</b>")
- if not member(unknown_set, cs) then {
- write(\unknown_file, "Warning: unknown control sequence \\", cs)
- insert(unknown_set, cs)
- return
- <<initialization>>=
- unknown_set := set()
- <<*>>=
- global cstab, csclosure, unknown_set
- The control sequences \verb+\begin+ and \verb+\end+ are treated
- specially,
- so we can have a similar machinery for environments.
- <<*>>=
- global begintab, endtab, begincl, endcl
- procedure do_begin(S, cs, cl)
- (="{", env := tab(upto('}')), ="}") | error("botched \\begin{...}")
- <<take open-group actions>>
- (begintab[env])(S, env, begincl[env])
- return
- procedure do_end(S, cs, cl)
- (="{", env := tab(upto('}')), ="}") | error("botched \\end{...}")
- # write(&errout, "calling ", image(endtab[env]), " for \\end{", env, "}")
- (endtab[env])(S, env, endcl[env])
- <<take close-group actions>>
- return
- <<control-sequence assignments>>=
- cstab["begin"] := do_begin
- cstab["end"] := do_end
- <<initialization>>=
- every begintab | endtab := table(unknown_env)
- every begincl | endcl := table()
- <<*>>=
- procedure unknown_env(S, env, cl)
- ### if S.text === ignore_text then return # a bit of a hack # no longer needed
- if \show_unknowns then S.text(S, "<b>{" || env || "}</b>")
- if not member(unknown_envs, env) then {
- write(\unknown_file, "Warning: unknown environment {", env, "}")
- insert(unknown_envs, env)
- return
- <<initialization>>=
- unknown_envs := set()
- <<*>>=
- global unknown_envs
- \subsection{Issuing warnings about unknown control sequences and environments}
- <<*>>=
- procedure warn_unknown(s, type, mark, rmark)
- if *s > 0 then {
- pushout("Unknown " || type || ": ")
- every pushout(((\mark | "")\1) || !sort(s) || ((\rmark | "")\1) || " ")
- pushout("\n")
- <<*>>=
- procedure pushout(s)
- static col
- initial col := 0
- if find("\n", s) then
- s ? {
- pushout(tab(upto('\n')))
- while ="\n" do {col := 0; write(&errout)}
- pushout(tab(0))
- }
- else {
- col +:= *s
- if col >= 79 then {writes(&errout, "\n "); col := *s + 2}
- writes(&errout, s)
- return
- \subsection{Procedures related to parsing {\TeX}}
- \subsubsection{Comment-skipping}
- This logic gobbles text into [[S.comment]]
- until a newline is encountered, at which point it calls
- [[Ccomment]] to format the comment.
- All other new-text actions go on hold until the comment is over.
- <<comment>>=
- parse_dynamic_add()
- delay_newtext(S)
- eat_comment(S)
- <<*>>=
- procedure eat_comment(S)
- S.comment ||:= tab(upto('\n') | 0)
- if pos(0) then
- before_next_newtext(S, eat_comment, [S])
- else {
- undelay_newtext(S)
- Ccomment(S)
- S.comment := ""
- return
- end
- <<other fields of state>>=
- , comment
- <<initial values for other fields of state>>=
- <<code to reset [[S]]>>=
- S.comment := ""
- Verbatim text is a little bit like comment text.
- For verbatim environment, we have a tag for the corresponding HTML,
- plus a string that terminates the environment.
- <<*>>=
- record verbatim_cl(html, terminator)
- procedure Cverbatim(S, cs, cl)
- S.text(S, tag(\cl.html))
- delay_newtext(S)
- do_verbatim(S, cl)
- return
- If we find the terminator, we're finished.
- Otherwise, we swallow the whole input and make sure our action on next
- input is to continue scanning.
- <<*>>=
- procedure do_verbatim(S, cl)
- if verbatimout(S, tab(find(cl.terminator))) then {
- =cl.terminator
- S.text(S, endtag(\cl.html))
- undelay_newtext(S)
- } else {
- verbatimout(S, tab(0))
- before_next_newtext(S, do_verbatim, [S, cl])
- return
- When writing verbatim text, we still have to convert HTML specials.
- <<*>>=
- procedure verbatimout(S, s)
- s ? {
- while S.text(S, tab(upto('&<>"'))) do
- case move(1) of {
- "\"" : S.text(S, """)
- "&" : S.text(S, "&")
- "<" : S.text(S, "<")
- ">" : S.text(S, ">")
- }
- S.text(S, tab(0))
- return
- The \verb+\verb+ control sequence's terminator is the first character
- following \verb+\verb+
- <<*>>=
- procedure Cverb(S, cs, cl)
- Cverbatim(S, cs, verbatim_cl("tt", move(1)))
- return
- \subsubsection{Arguments}
- It's occasionally necessary to collect the argument of a control
- sequence.
- [[csarg]] does the job.
- <<*>>=
- procedure csarg(S)
- return 2(="{", tab(bal('}', '{', '}')), ="}") |
- (optwhite(),
- if ="\\" then
- "\\" || (tab(many(S.csletters)) | move(1))
- else
- move(1))
- \subsubsection{Misc specials}
- Ampersand is weak --- I just use some string depending on the environment.
- Tables look sort of OK.
- Notice that ampersands close and open groups.
- <<ampersand>>=
- <<take close-group actions>>
- emit_text(S, S.ampersand)
- <<take open-group actions>>
- <<other fields of state>>=
- , ampersand
- <<initial values for other fields of state>>=
- , " --- "
- The dollar sign is for entering and exiting math mode:
- <<dollar sign>>=
- if /S.ignoring then
- if ="$" then
- if S.mode == "M" then { Cdisplaymath_end(S); S.mode := "V" }
- else { Cdisplaymath(S); S.mode := "M" }
- else
- if S.mode == "M" then { Cmath_end(S); S.mode := "H" }
- else { Cmath(S); S.mode := "M" }
- Newlines emit themselves, plus start skipping blanks until they get to
- some nonblank text.
- We have to identify a blank line so we can insert a paragraph marker.
- <<newline>>=
- emit_text(S, "\n")
- if /S.ignoring then Cnewline(S)
- <<*>>=
- procedure Cnewline(S)
- tab(many(' \t'))
- if match("\n") then implicit_paragraph(S)
- if pos(0) then before_next_newtext(S, Cnewline, [S])
- Other procedures might want to skip white space, which includes
- newlines, but we don't want to miss a paragraph.
- <<*>>=
- procedure skipblanks(S)
- tab(many(' \t'))
- if ="\n" then Cnewline(S)
- else if pos(0) then before_next_newtext(S, skipblanks, [S])
- Paragraphs count only in horizontal or math mode (and they better not
- happen in math mode!).
- <<*>>=
- procedure implicit_paragraph(S, cs, cl)
- if S.mode ~== "V" then {
- S.mode := "V"
- Cparagraph(S)
- cs_ignore(S, cs, \cl)
- Here's a real hack. I use it to stop skipping blanks when the noweb
- filter sees text quoted by [[[[...]]]].
- That text is never converted, but we don't want to skip blanks that
- follow it.
- <<*>>=
- procedure stop_skipping(S)
- while S.newtext[1].proc === (Cnewline|skipblanks) do pop(S.newtext)
- \subsubsection{Items}
- For items, we actually want to do something with the optional arguments,
- namely, convert them.
- We wrap them in braces so that any font changes and so on will be
- appropriately limited in their effects.
- <<*>>=
- record item_cl(before, after, ifnone)
- procedure Citem(S, cs, cl)
- if pos(0) then
- after_next_newtext(S, Citem, [S, cs, cl])
- else if ="[" then {
- delay_newtext(S)
- with_upto_bracket(S, "", convert_bracketed, cl)
- } else {
- skipblanks(S)
- S.text(S, cl[1].ifnone)
- <<*>>=
- procedure convert_bracketed(S, contents, cl)
- S.text(S, cl[1].before ||
- convert(converter("H"), "{" || contents || "}") ||
- cl[1].after)
- optwhite()
- <<*>>=
- procedure listenv(env, html)
- begintab[env] := Clist
- begincl[env] := html
- endtab[env] := Clist_end
- endcl[env] := html
- procedure Clist(S, cs, cl)
- S.text(S, tag(cl))
- push(csclosure["item"],
- if cs == "description" then item_cl("<dt>", "<dd>", "<dt><dd>")
- else item_cl("<li>", "--", "<li>"))
- procedure Clist_end(S, cs, cl)
- S.text(S, endtag(cl))
- pop(csclosure["item"])
- \subsubsection{Labels and references}
- These could be done by [[argblock]], except I want to make it possible to have
- different text depending on whether the references point forward or backward.
- <<*>>=
- global labels_seen
- procedure Clabel(S, cs, cl)
- initial /labels_seen := set()
- insert(labels_seen, l := csarg(S)) | fail
- S.text(S, "<a name=\"" || l || "\"><b>[*]</b></a>")
- procedure Cref(S, cs, cl)
- initial /labels_seen := set()
- l := csarg(S) | fail
- S.text(S, "<a href=\"#" || l || "\">[" ||
- (if member(labels_seen, l) then "<-" else "->") || "]</a>")
- \subsubsection{Citations}
- The important thing about a citation key is that it makes a hot line
- to the appropriate item in the bibliography.
- [[Ccite]] and [[Cbibitem]] work together to make it happen.
- Optional arg might contain blanks, so it might be split, but
- I assume the citation key isn't split between inputs.
- <<*>>=
- procedure Ccite(S, cs, cl, bracketed_text)
- if ="[" then {
- delay_newtext(S)
- with_upto_bracket(S, "", do_cite, cl)
- } else
- do_cite(S, &null, cl)
- procedure do_cite(S, commentary, cl)
- local key
- if \commentary then
- optwhite()
- if pos(0) then before_next_newtext(S, do_cite, [S, commentary, cl])
- else {
- key := csarg(S)
- \commentary := convert(converter("H"), "{" || \commentary || "}")
- S.text(S, "<b>[cite <a href=\"#NWcite-" || key || "\">" || key || "</a>" ||
- (("<i>, " || \commentary || "</i>") | "" ) ||
- "]</b>")
- <<*>>=
- procedure Cbibitem(S, cs, cl)
- local label, key
- static counter
- initial counter := 0
- if ="[" then {
- delay_newtext(S)
- with_upto_bracket(S, "", finish_bibitem, [])
- } else {
- label := "<b>[" || (counter +:= 1) || "]</b>"
- key := csarg(S) | fail
- S.text(S, "<br><a name=\"NWcite-" || key || "\">" || label || "</a> ")
- procedure finish_bibitem(S, contents, args)
- local key, label
- optwhite()
- key := csarg(S) | fail
- label := convert(converter("H"), "{" || contents || "}")
- S.text(S, "<br><a name=\"NWcite-" || key || "\">" || label || "</a> ")
- \subsubsection{Conditionals}
- The idea here is that an \verb+\if+$\cdots$ control sequence will conditionally
- ignore text, and that \verb+\fi+ restores the previous state.
- To keep track of state, we have an ``if stack'' that records what
- [[S.text]] should be upon encountering \verb+\else+ and \verb+\fi+.
- <<other fields of state>>=
- , ifstack
- <<initial values for other fields of state>>=
- <<code to reset [[S]]>>=
- if *S.ifstack > 0 then S.ifstack := [] # keeps GC down
- What's on the ifstack is
- <<*>>=
- record ifrec(on_else, on_fi)
- @ It's possible that one day this code will need to be updated to delay
- new-text actions (and to do God knows what if
- new-text actions have already been delayed).
- Every \verb+\if+$\cdots$ is equivalent either to \verb+\iffalse+
- of \verb+\iftrue+, so we begin by defining those, as well as \verb+\else+
- and \verb+\fi+
- <<*>>=
- procedure Ciffalse(S, cs, cl)
- #error("### \\", cs, " -> false (S.ignoring === ", image(S.ignoring) ? {="procedure "; tab(0)}, ")")
- push(S.ifstack, ifrec(S.ignoring, S.ignoring))
- S.ignoring := 1
- procedure Ciftrue(S, cs, cl)
- #error("### \\", cs, " -> true (S.ignoring === ", image(S.ignoring) ? {="procedure "; tab(0)}, ")")
- push(S.ifstack, ifrec(1, S.ignoring))
- procedure Celse(S, cs, cl)
- S.ignoring := S.ifstack[1].on_else
- #error("### \\else -> S.ignoring === ", image(S.ignoring) ? {="procedure "; tab(0)})
- procedure Cfi(S, cs, cl)
- S.ignoring := S.ifstack[1].on_fi
- #error("### \\fi -> S.ignoring === ", image(S.ignoring) ? {="procedure "; tab(0)})
- pop(S.ifstack)
- Now, all that's left is to handle \verb+\newif+.
- This part is all boilerplate.
- <<*>>=
- procedure Cnewif(S, cs, cl)
- local newif, newcs
- tab(many(' \t\n'))
- if pos(0) then
- after_next_newtext(S, Cnewif, [S, cs, cl])
- else {
- newif := csarg(S)
- newif ?
- if ="\\if" & newcs := tab(many(S.csletters)) & pos(0) then {
- <<make [[newcs]] a new \verb+\if+-like thing>>
- } else
- error("\\newif argument botch: " || newif)
- And here we do the real work:
- <<make [[newcs]] a new \verb+\if+-like thing>>=
- cstab[newcs || "false"] := Csetif
- cstab[newcs || "true"] := Csetif
- cstab["if" || newcs] := Ciffalse
- <<*>>=
- procedure Csetif(S, cs, cl)
- local base, tag
- if cs ? (base := tab(find("true"|"false")), tag := =("true"|"false"), pos(0)) then {
- cstab["if" || base] := if tag == "true" then Ciftrue else Ciffalse
- } else {
- error("This can't happen --- setif botch (not urgent)")
- \subsection{Reading and converting auxiliary {\LaTeX} files}
- <<*>>=
- procedure auxfile(cs, ext, placeholder, header, ignore)
- cstab[cs] := Cauxfile
- csclosure[cs] := aux_cl(ext, placeholder, header, \ignore | "")
- [[Cauxfile]] succeeds if it finds a file, fails otherwise.
- <<*>>=
- record aux_cl(ext, placeholder, header, ignore)
- procedure Cauxfile(S, cs, cl)
- local auxfile, T
- if auxfile := open(basename(\curfile) || "." || cl.ext) then {
- T := converter("V")
- Cmakeatletter(T)
- S.text(S, \cl.header)
- while line := read(auxfile) do
- S.text(S, convert(T, line || "\n"))
- close(auxfile)
- } else {
- S.text(S, \cl.placeholder)
- cs_ignore(S, cs, cl.ignore)
- if \auxfile then return
- <<*>>=
- procedure basename(name)
- reverse(name) ? {
- tab(upto('.')) & ="."
- return reverse(tab(0))
- \subsubsection{Table of contents}
- We can build a table of contents by reading the .toc file.
- Sadly, I haven't figured out how to get hot links yet.
- <<control-sequence assignments>>=
- cstab["contentsline"] := Ccontentsline
- <<*>>=
- procedure Ctableofcontents(S, cs, cl)
- S.mode := "V"
- Cauxfile(S, cs, cl)
- set_toclevel(S)
- [[set_toclevel]] manages the starting and ending of lists.
- With no level argument, it resets the toc to the initial level.
- <<*>>=
- procedure set_toclevel(S, l)
- static toclevel, initiallevel
- if /initiallevel := \l then
- S.text(S, "<ul compact>")
- if /l := \initiallevel then
- S.text(S, "</ul>")
- if /l then return # never set a level
- /toclevel := l
- while toclevel < l do {
- S.text(S, "<ul compact>")
- toclevel +:= 1
- while toclevel > l do {
- S.text(S, "</ul>")
- toclevel -:= 1
- return
- Assume one table of contents per converted document.
- <<*>>=
- procedure Ccontentsline(S, cs, cl)
- local type, level
- static leveltab
- initial { <<assign numbers of sections in leveltab>> }
- l := \leveltab[csarg()] | fail
- if l > \countertab["tocdepth"] then
- cs_ignore(S, cs, "{{") # skip this one
- else {
- set_toclevel(S, l)
- S.text(S, "<li>")
- after_next_open(S, after_next_close, [S, cs_ignore, [S, cs, "{"]])
- <<assign numbers of sections in leveltab>>=
- l := ["part", "chapter", "section", "subsection", "subsubsection",
- "paragraph", "subparagraph"]
- leveltab := table()
- every i := 1 to *l do
- leveltab[l[i]] := i - 2 # making section level 1
- \subsubsection{Counters}
- <<*>>=
- global countertab
- procedure Csetcounter(S, cs, cl)
- local counter
- (counter := csarg(), countertab[counter] := integer(csarg())) |
- cs_ignore(S, cs, "{{")
- <<initialization>>=
- countertab := table()
- \subsubsection{Accents}
- This info is taken from the HTML RFC, section entitled
- ``ISO Latin~1 character entities.''
- <<*>>=
- global accent_name, accent_valid
- <<initialization>>=
- accent_name := table()
- accent_valid := table('')
- accent_name ["`"] := "grave"
- accent_valid["`"] := 'AEIOUaeiou'
- accent_name ["'"] := "acute"
- accent_valid["'"] := 'AEIOUYaeiouy'
- accent_name ["^"] := "circ"
- accent_valid["^"] := 'AEIOUaeiou'
- accent_name ["\""] := "uml"
- accent_valid["\""] := 'AEIOUaeiouy'
- accent_name ["~"] := "tilde"
- accent_valid["~"] := 'ANOano'
- accent_name ["="] := "bar"
- accent_name ["."] := "dot"
- accent_name ["u"] := "u"
- accent_name ["v"] := "v"
- accent_name ["H"] := "H"
- accent_name ["t"] := "t"
- accent_name ["c"] := "cedil"
- accent_valid["c"] := 'Cc'
- accent_name ["d"] := "underdot"
- accent_name ["b"] := "underbar"
- Initialization calls [[accent]] to indicate that a control
- sequence represents an accent.
- In fact, [[accent]] is called on all keys of [[accent_name]].
- <<*>>=
- procedure accent(cs)
- cstab[cs] := Caccent
- procedure Caccent(S, cs, cl)
- static warned
- initial warned := table()
- arg := csarg(S) | return
- if *arg = 1 & any(accent_valid[cs], arg) then
- S.text(S, "&" || arg || accent_name[cs] || ";")
- else {
- <<warn about [[cs]] with [[arg]]>>
- S.text(S, arg)
- <<warn about [[cs]] with [[arg]]>>=
- /warned[cs] := set()
- if not member(warned[cs], arg) then {
- write(&errout, "Warning: Can't handle \\", cs, " with arg `", arg, "'")
- insert(warned[cs], arg)
- \subsection{Font changes}
- A font change changes the font until the next close, when we need to emit
- the appropriate end tag.
- <<*>>=
- procedure fontchange(tex, html)
- cstab[tex] := Cfontchange
- csclosure[tex] := html
- <<*>>=
- procedure Cfontchange(S, tex, html)
- S.text(S, tag(html))
- before_next_close(S, emit_text, [S, endtag(html)])
- \section{Implementations of declaratives}
- \label{imp-decl}
- \subsection{Ignoring stuff}
- There are several different kinds of things that can be ignored:
- ordinary arguments,
- balanced-brace arguments, optional arguments, assignments (which may
- include dimensions), stars, and parenthesized coordinates.
- We ignore a sequence of these things by supplying a template to
- [[ignore]], in which each character stands for something to be ignored.
- We've already seen examples of these things in Section~\ref{cs-decls}.
- We can ignore arguments of control sequences or environments.
- In either case, [[cs_ignore]] does the work.
- <<*>>=
- procedure ignore(cs, template)
- /template := ""
- cstab[cs] := cs_ignore
- csclosure[cs] := template
- procedure ignoreenv(env, template)
- /template := ""
- begintab[env] := cs_ignore
- begincl[env] := template
- endtab[env] := do_nothing
- Because ignoring may span many inputs, all [[cs_ignore]] does is set things
- up to call [[do_ignore]].
- The major setup is replacing [[S.text]] with a function that does nothing.
- Oh, and it converts an integer template
- into that many arguments, for historical reasons.
- <<*>>=
- procedure cs_ignore(S, cs, template, proc, args)
- local saved_ignore
- saved_ignore := S.ignoring
- S.ignoring := 1
- if type(template) == "integer" then template := repl("{", template)
- return do_ignore(S, template, saved_ignore, proc, args)
- Some things are easily ignored (partly because we assume they don't
- span inputs). For others, we have special procedures.
- The brace-ignoring stuff uses the open and close hooks, because braces
- can be nested deeply.
- If non-null, [[proc]] is applied to [[args]] after everything is ignored.
- <<*>>=
- procedure do_ignore(S, template, saved_ignore, proc, args)
- if *template > 0 then
- if optwhite() & pos(0) then
- after_next_newtext(S, do_ignore, [S, template, saved_ignore, proc, args])
- else
- case template[1] of {
- "{" : { S.ignoring := 1
- after_next_open(S, ignore_til_close,
- [S, template[2:0], saved_ignore, proc, args])
- }
- "A" : { csarg(S) # had better be in one input
- do_ignore(S, template[2:0], saved_ignore, proc, args)
- }
- "[" : if optwhite() & ="[" then {
- delay_newtext(S)
- with_upto_bracket(S, "", ignore_bracket_plus,
- [S, template[2:0], saved_ignore, proc, args])
- } else
- do_ignore(S, template[2:0], saved_ignore, proc, args)
- "C" : # a total cheat, means ``copy optional arg''
- if optwhite() & ="[" then {
- S.ignoring := &null
- delay_newtext(S)
- with_upto_bracket(S, "", copy_bracket_plus,
- [S, template[2:0], saved_ignore, proc, args])
- } else
- do_ignore(S, template[2:0], saved_ignore, proc, args)
- "=" : { delay_newtext(S)
- eat_assignment(S, do_ignore, [S, template[2:0], saved_ignore, proc,args])
- }
- "*" : { (="*", optwhite())
- do_ignore(S, template[2:0], saved_ignore, proc, args)
- }
- "(" : { (="(", tab(upto(')')), =")", optwhite())
- do_ignore(S, template[2:0], saved_ignore, proc, args)
- }
- }
- else {
- S.ignoring := saved_ignore
- (\proc)!(\args)
- procedure ignore_til_close(S, template, saved_ignore, proc, args)
- before_next_close(S, do_ignore, [S, template, saved_ignore, proc, args])
- Finally, at the end of an ignored environment, do nothing.
- <<*>>=
- procedure do_nothing(S, cs, cl)
- return
- \subsubsection{Parsing bracketed (optional) arguments}
- We may have to deal with optional arguments that are split across lines.
- We pass in a continuation for the bracket.
- This is a lot like gobbling to a newline, which we had to do with a comment.
- As in the other case, we do something stupid if the bracket is
- protected (e.g. by a backslash or comment char).
- <<*>>=
- procedure with_upto_bracket(S, bracketed_text, proc, args)
- bracketed_text ||:= tab(upto(']') | 0)
- if pos(0) then
- before_next_newtext(S, with_upto_bracket, [S, bracketed_text, proc, args])
- else {
- ="]"
- undelay_newtext(S)
- (\proc)(S, bracketed_text, args)
- return
- end
- To ignore brackets:
- <<*>>=
- procedure ignore_bracket_plus(S, contents, args)
- # contents are ignored
- do_ignore!args
- @ and to copy them
- <<*>>=
- procedure copy_bracket_plus(S, contents, args)
- local text
- text := args[3] | fail # saved_ignore arg to do_ignore
- text(S, convert(converter("H"), "{" || contents || "}"))
- do_ignore!args
- \subsubsection{Ignoring assignments}
- Assignments are tricky because they might involve numbers, control
- sequences, dimensions, or even glue.
- We approximate the syntax from page 275 in the \TeX book.
- <<*>>=
- procedure eat_assignment(S, proc, args)
- static decimal_chars
- initial decimal_chars := &digits ++ '.,+-'
- optwhite()
- ="=" # so what if we swallow multiple = signs
- optwhite()
- if pos(0) then {
- before_next_newtext(S, eat_assignment, [S, proc, args])
- return
- } else if glue() then { # finished
- } else if any(decimal_chars) then {
- tab(many(decimal_chars))
- optwhite()
- if ="\\" then
- tab(many(S.csletters)) | move(1)
- # else assume assignment of the form \hangafter=2
- } else if ="\\" then
- tab(many(S.csletters)) | move(1)
- undelay_newtext(S)
- (\proc)!args
- <<*>>=
- procedure dimen()
- static decimal_chars
- initial decimal_chars := &digits ++ '.,'
- suspend (optwhite(),
- if any('+-') then (move(1), optwhite()) else "",
- tab(many(decimal_chars)), optwhite(),
- (="true", optwhite()) | &null,
- =("em"|"ex"|"pt"|"pc"|"in"|"bp"|"cm"|"mm"|"dd"|"cc"|"sp"|"mu"))
- <<*>>=
- procedure glue()
- suspend (dimen(),
- (optwhite(), ="plus", dimen()) | "",
- (optwhite(), ="minus", dimen()) | "")
- \subsection{Substitution}
- \subsubsection{Simple substitution for a single control sequence}
- Even simple substitution isn't so simple, because in addition to the
- HTML that we substitute for the {\TeX}, we can also supply a template
- of stuff to be ignored (like the optional argument to \verb+\\+).
- <<*>>=
- procedure substitution(tex, html, ignore_template)
- # ignore mode for now
- cstab[tex] := Cemit_ig
- csclosure[tex] := emit_ig_cl(html, \ignore_template | "")
- The closure contains HTML to be written and a template to be ignored.
- <<*>>=
- record emit_ig_cl(html, template)
- procedure Cemit_ig(S, cs, cl)
- S.text(S, cl.html)
- if *cl.template > 0 then
- cs_ignore(S, cs, cl.template)
- \subsubsection{Substitution for environments}
- The [[envblock]] procedure has two forms:
- \begin{itemize}
- \item
- {}[[envblock(]]{\it environment}, {\it tag}[[)]] simply uses
- begin- and end-{\it tag} in place of the environment.
- \item
- {}[[envblock(]]{\it environment}, {\it left}, {\it right}, {\it
- ignore}[[)]]
- puts the {\it left} text at the beginning of the environment, the {\it
- right} text at the end, plus at the beginning of the environment it
- ignores the arguments described by {\it ignore}.
- \end{itemize}
- It's easier to implement than to describe.
- <<*>>=
- procedure envblock(env, left, right, ignore_template)
- /ignore_template := ""
- begintab[env] := Cemit_ig
- begincl[env] := emit_ig_cl(if /right then tag(left) else left, ignore_template)
- endtab[env] := Cemit
- endcl[env] := if /right then endtag(left) else right
- @ [[Cemit]] emits text with nothing to ignore.
- <<*>>=
- procedure Cemit(S, cs, cl)
- S.text(S, cl)
- \subsubsection{Substitution around arguments of control sequences}
- These substitutions place tags at the beginning and end of arguments
- to control sequences, instead of surrounding the contents of an
- environment.
- For example, they specify how to convert [[\section{...}]] to
- [[<h1>...</h1>]] and so forth.
- The calling convention is as for [[envblock]].
- <<*>>=
- record blockpair(left, right, ignore)
- procedure argblock(tex, html, right, ignore)
- # called as is envblock
- /ignore := ""
- cstab[tex] := Cblock
- csclosure[tex] :=
- if /right then blockpair (tag(html), endtag(html), ignore)
- else blockpair (html, right, ignore)
- @ There is a fine point; control sequences labelled with [[argblockv]]
- should put the converter into vertical mode.
- <<*>>=
- procedure argblockv(tex, html, right, ignore)
- argblock(tex, html, right, ignore)
- cstab[tex] := CblockV
- <<*>>=
- procedure Cblock(S, cs, cl, done_ignoring)
- if /done_ignoring & *cl.ignore > 0 then {
- cs_ignore(S, cs, cl.ignore, Cblock, [S, cs, cl, 1])
- } else if pos(0) then {
- after_next_newtext(S, do_cs, [S, cs, cl])
- } else if match("{") then {
- S.text(S, cl.left)
- after_next_open(S, before_next_close, [S, emit_text, [S, cl.right]])
- } else {
- S.text(S, cl.left || csarg(S) || cl.right)
- return
- <<*>>=
- procedure CblockV(S, cs, cl)
- S.mode := "V"
- Cblock(S, cs, cl)
- return
- \subsection{Table environments}
- For tables, we not only have an HTML tag, we also supply some text
- for the ampersand.
- [[args]] is a template describing the arguments to the environment,
- which are ignored.
- <<*>>=
- record table_closure(args, amp, html)
- procedure table_env(env, args, amp, html)
- begintab[env] := Ctable
- begincl[env] := table_closure(args, amp, html)
- endtab[env] := Ctable_end
- endcl[env] := []
- <<*>>=
- procedure Ctable(S, env, cl)
- local amp
- amp := S.ampersand
- S.ampersand := cl.amp
- S.text(S, tag(\cl.html))
- push(endcl[env], amp)
- cs_ignore(S, env, cl.args)
- procedure Ctable_end(S, env, cl)
- S.ampersand := pop(cl)
- S.text(S, endtag(\begincl[env].html))
- \subsection{Control-sequence assignment}
- This procedure is available to be used for dynamic assignment.
- One day we might use it to parse \verb+\let+ as well.
- <<*>>=
- procedure let(lhs, rhs)
- cstab[lhs] := cstab[rhs]
- csclosure[lhs] := csclosure[rhs]
- \section{HTML formatting}
- \label{html-format}
- First, generic procedures used to create beginning and ending tags.
- <<*>>=
- procedure tag(html)
- return "<" || html || ">"
- procedure endtag(html)
- return "</" || html || ">"
- Next, a gazillion formatting procedures.
- <<*>>=
- procedure Ccomment(S)
- if *S.comment > 0 then {
- S.text(S, "<!--")
- S.comment ? {
- while S.text(S, tab(find("--"))) do {
- move(2)
- S.text(S, "- - ")
- }
- S.text(S, tab(0))
- }
- S.text(S, "-->")
- S.comment := ""
- return
- <<*>>=
- procedure Cparagraph(S)
- S.text(S, "<p>")
- <<*>>=
- procedure Cmath(S)
- <<take open-group actions>>
- S.text(S, "<i>")
- procedure Cmath_end(S)
- S.text(S, "</i>")
- <<take close-group actions>>
- <<*>>=
- procedure Cdisplaymath(S)
- <<take open-group actions>>
- S.text(S, "<blockquote><i>")
- procedure Cdisplaymath_end(S)
- S.text(S, "</i></blockquote>")
- <<take close-group actions>>
- <<*>>=
- procedure Cmakeatletter(S)
- S.csletters ++:= '@'
- procedure Cmakeatother(S)
- S.csletters --:= '@'
- Approximate \verb+\kill+ by eliminating text.
- <<*>>=
- procedure Ckill(S, cs, cl)
- S.the_text := ""
- \section{Support for adding control sequences dynamically}
- The idea is to use formal comments of the form:
- \begin{quote}
- \verb+% l2h function arg arg ...+
- \end{quote}
- These comments have the same effect as the procedure calls in
- the chunk [[<<control-sequence assignments>>]].
- Our first step is to create a table with the names of the functions we
- recognize.
- Ordinarly this table would be distributed, but I created it after the
- fact with a little quick Unix pipeline.
- <<*>>=
- global csfunctions
- <<initialization>>=
- csfunctions := table()
- <<assign to dynamic-add table>>=
- csfunctions["argblock"] := argblock
- csfunctions["argblockv"] := argblockv
- csfunctions["envblock"] := envblock
- csfunctions["fontchange"] := fontchange
- csfunctions["ignore"] := ignore
- csfunctions["ignoreenv"] := ignoreenv
- csfunctions["let"] := let
- csfunctions["listenv"] := listenv
- csfunctions["substitution"] := substitution
- Now, the tough issue is how to parse arguments. I'm going to try the
- following initial strategy: arguments are separated by spaces.
- To put a space within an argument, use \verb+#+. There is no way to
- put a \verb+#+ within an argument.
- <<*>>=
- procedure parse_dynamic_add(S)
- if (optwhite(), =("l2h"|"sl2h"), skipwhite(),
- p := tab(upto(' \t')), <<make [[p]] a good function or warn and [[fail]]>>,
- skipwhite(), any(~'\n')) then {
- a := []
- while any(~'\n') do {
- put(a, map(tab(upto(' \t\n') | 0), "#", " "))
- skipwhite()
- }
- p!a
- return
- <<make [[p]] a good function or warn and [[fail]]>>=
- ((p := \csfunctions[p]) |
- { dynamic_warn(p); fail })
- <<*>>=
- procedure dynamic_warn(p)
- static badprocs
- initial badprocs := set()
- if not member(badprocs, p) then {
- write(&errout, "Warning: % l2h ", p, " not recognized -- ignored")
- insert(badprocs, p)
- @
- \section{Miscellanous utilities}
- [[optwhite]] skips and returns optional white space.
- <<*>>=
- procedure optwhite()
- suspend tab(many(' \t')) | ""
- @ [[skipwhite]] insists that there must be some white space.
- <<*>>=
- procedure skipwhite()
- suspend tab(many(' \t'))
- \section{Main program for a noweb filter}
- First, this is how we use the converter as a noweb filter.
- <<l2h.icn>>=
- <<*>>
- procedure main(args)
- local line
- every arg := !args do
- case arg of {
- "-show-unknowns" : show_unknowns := 1
- default : write(&errout, "l2h filter: unknown arg ", image(arg))
- }
- while line := read() do
- apply(filter, line)
- warn_unknown(\unknown_set, "control sequences", "\\")
- warn_unknown(\unknown_envs, "environments", "{", "}")
- procedure apply(pass, line)
- line ? (="@" & pass(tab(upto(' ')|0), if =" " then tab(0) else &null))
- This is noweb filter machinery. I really ought to coordinate quoted text
- with the converter (so it always shows up in the right place),
- but so far I'm too lazy.
- <<l2h.icn>>=
- global curfile, curline
- procedure filter(name, arg)
- static S, C, code
- initial { S := converter("V"); C := converter("H") }
- case name of {
- "begin" : {<<out>>; if match("code ", arg) then code := 1}
- "end" : {<<out>>; code := &null; S.mode := "V"}
- "quote" : { outtext("\0" ? convert(S)) }
- "endquote" : { outtext("\1" ? convert(S)) }
- "file" : {<<out>>; curfile := arg; curline := 1}
- "line" : {<<out>>; curline := integer(arg)}
- "defn" : { write("@", name, " ", "{" || arg || "}" ? convert(C)); reset(C) }
- "use" : { write("@", name, " ", "{" || arg || "}" ? convert(C)); reset(C) }
- "text" : {if \code then <<out>> else outtext(arg ? convert(S))}
- "nl" : {if \code then <<out>> else outtext("\n" ? convert(S)); curline +:= 1}
- default : {<<out>>}
- return
- <<out>>=
- write("@", name, (" " || \arg) | "")
- <<l2h.icn>>=
- procedure outtext(s)
- s ?
- while not pos(0) do
- if ="\n" then write("@nl")
- else if ="\0" then write("@quote")
- else if ="\1" then write("@endquote")
- else write("@text ", tab(upto('\n\0\1') | 0))
- return
- <<l2h.icn>>=
- procedure error(args[])
- return write!([&errout, (\curfile || ", ") | "", "line ", curline, ": "] ||| args)
- \section{Main program for a simple converter}
- <<sl2h.icn>>=
- <<*>>
- global curfile
- procedure main(args)
- S := converter("V")
- every arg := !args do
- if arg[1] == "-" then
- case arg of {
- "-show-unknowns" : show_unknowns := 1
- default : write(&errout, "Warning: unrecognized option ", arg)
- }
- else if f := open(curfile <- arg) then
- while line := read(f) do writes(convert(S, line || "\n"))
- else
- write(&errout, "Error: Can't open file ", arg)
- if /curfile then
- while line := read() do writes(convert(S, line || "\n"))
- warn_unknown(\unknown_set, "control sequences", "\\")
- warn_unknown(\unknown_envs, "environments", "{", "}")
- \section{Chunks}
- \nowebchunks
- \begin{multicols}{2}[\section{Index}]
- \nowebindex
- \end{multicols}
- \end{document}
-